package net.ming616.nlp.extraction.service.impl;

import java.net.URL;
import java.util.ArrayList;
import java.util.List;

import net.ming616.nlp.extraction.model.JingdongProduct;
import net.ming616.nlp.extraction.service.JingdongExtractor;

import org.apache.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Service;

@Service("jingdongExtractor")
public class JingdongExtractorImpl implements JingdongExtractor {
	/**
	 * Logger for this class
	 */
	private static final Logger logger = Logger
			.getLogger(JingdongExtractorImpl.class);

	public JingdongProduct getProduct(String productURL) {
		JingdongProduct product = new JingdongProduct();
		Document doc = null;
		try {
			doc = Jsoup.parse(new URL(productURL), 6000);
			Element name = doc.select("#name").first();
			product.setUrl(productURL);
			if (null != name) {
				product.setName(name.text());
			}
			Elements elements = doc.select("#detail .tdTitle");
			for (Element element : elements) {
				String attributeName = element.text();
				Element valueElemeent = element.nextElementSibling();
				if (null != valueElemeent) {
					String attributeValue = valueElemeent.text();
					product.getAttributes().put(attributeName, attributeValue);
				}
			}
		} catch (Exception e) {
			logger.error(e.getMessage());
		}
		return product;
	}

	public List<String> getProductURLList(String baseURL) {
		Document doc = null;
		try {
			doc = Jsoup.parse(new URL(baseURL), 6000);
		} catch (Exception e) {
			logger.error(e.getMessage());
		}
		List<String> urls = new ArrayList<String>();
		Elements items = doc.select("#plist li .p-name a");
		for (Element item : items) {
			String url = item.attr("href");
			urls.add(url);
		}
		return urls;
	}

}
